{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using Theano backend.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Couldn't import dot_parser, loading of dot files will not be possible.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using gpu device 0: GeForce GTX TITAN X (CNMeM is disabled, cuDNN 4007)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "corpus length: 1115394\n",
      "total chars: 39\n",
      "nb sequences: 1115355\n",
      "Vectorization...\n",
      "vetorization completed\n"
     ]
    }
   ],
   "source": [
    "\n",
    "####\n",
    "\n",
    "#minesh.mathew@gmail.com\n",
    "#modified version of text generation example in keras; trained in a many-to-many fashion using a time distributed dense layer\n",
    "\n",
    "####\n",
    "from __future__ import print_function\n",
    "from keras.models import Sequential\n",
    "from keras.layers import Dense, Activation, Dropout\n",
    "from keras.layers import LSTM,TimeDistributedDense,SimpleRNN\n",
    "from keras.utils.data_utils import get_file\n",
    "import numpy as np\n",
    "from time import sleep\n",
    "import random\n",
    "import sys\n",
    "\n",
    "##uncomment below if you want to use nietzches writings as the corpus\n",
    "\n",
    "#path = get_file('nietzsche.txt', origin=\"https://s3.amazonaws.com/text-datasets/nietzsche.txt\")\n",
    "#text = open(path).read().lower()\n",
    "text = open('./textdatasets/tinyshakesepare.txt').read().lower()\n",
    "print('corpus length:', len(text))\n",
    "\n",
    "chars = sorted(list(set(text)))\n",
    "print('total chars:', len(chars))\n",
    "char_indices = dict((c, i) for i, c in enumerate(chars))\n",
    "indices_char = dict((i, c) for i, c in enumerate(chars))\n",
    "\n",
    "# split the corpus into sequences of length=maxlen\n",
    "#input is a sequence of 40 chars and target is also a sequence of 40 chars shifted by one position\n",
    "#for eg: if you maxlen=3 and the text corpus is abcdefghi, your input ---> target pairs will be\n",
    "# [a,b,c] --> [b,c,d], [b,c,d]--->[c,d,e]....and so on\n",
    "maxlen = 40\n",
    "step = 1\n",
    "sentences = []\n",
    "next_chars = []\n",
    "for i in range(0, len(text) - maxlen+1, step):\n",
    "    sentences.append(text[i: i + maxlen])\n",
    "    next_chars.append(text[i+1:i +1+ maxlen])\n",
    "    #if i<10 :\n",
    "       # print (text[i: i + maxlen])\n",
    "        #print(text[i+1:i +1+ maxlen])\n",
    "print('nb sequences:', len(sentences))\n",
    "\n",
    "print('Vectorization...')\n",
    "X = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)\n",
    "y = np.zeros((len(sentences),maxlen, len(chars)), dtype=np.bool) # y is also a sequence , or  a seq of 1 hot vectors\n",
    "for i, sentence in enumerate(sentences):\n",
    "    for t, char in enumerate(sentence):\n",
    "        X[i, t, char_indices[char]] = 1\n",
    "\n",
    "for i, sentence in enumerate(next_chars):\n",
    "    for t, char in enumerate(sentence):\n",
    "        y[i, t, char_indices[char]] = 1\n",
    "    \n",
    "\n",
    "print ('vetorization completed')\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Build model...\n",
      "model is made\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/users/minesh.mathew/.local/lib/python2.7/site-packages/keras/layers/core.py:1014: UserWarning: TimeDistributedDense is deprecated, please use TimeDistributed(Dense(...)) instead.\n",
      "  warnings.warn('TimeDistributedDense is deprecated, '\n"
     ]
    }
   ],
   "source": [
    "# build the model: 2 stacked LSTM\n",
    "print('Build model...')\n",
    "model = Sequential()\n",
    "#model.add(LSTM(512, return_sequences=True, input_shape=(maxlen, len(chars))))  # original one\n",
    "model.add(LSTM(512, input_dim=len(chars),return_sequences=True)) #minesh witout specifying the input_length\n",
    "model.add(LSTM(512, return_sequences=True)) #- original\n",
    "model.add(Dropout(0.2))\n",
    "model.add(TimeDistributedDense(len(chars)))\n",
    "model.add(Activation('softmax'))\n",
    "\n",
    "model.compile(loss='categorical_crossentropy', optimizer='rmsprop')\n",
    "\n",
    "print ('model is made')\n",
    "\n",
    "# train the model, output generated text after each iteration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "____________________________________________________________________________________________________\n",
      "Layer (type)                     Output Shape          Param #     Connected to                     \n",
      "====================================================================================================\n",
      "lstm_1 (LSTM)                    (None, None, 512)     1130496     lstm_input_1[0][0]               \n",
      "____________________________________________________________________________________________________\n",
      "lstm_2 (LSTM)                    (None, None, 512)     2099200     lstm_1[0][0]                     \n",
      "____________________________________________________________________________________________________\n",
      "dropout_1 (Dropout)              (None, None, 512)     0           lstm_2[0][0]                     \n",
      "____________________________________________________________________________________________________\n",
      "timedistributeddense_1 (TimeDistr(None, None, 39)      20007       dropout_1[0][0]                  \n",
      "____________________________________________________________________________________________________\n",
      "activation_1 (Activation)        (None, None, 39)      0           timedistributeddense_1[0][0]     \n",
      "====================================================================================================\n",
      "Total params: 3249703\n",
      "____________________________________________________________________________________________________\n",
      "None\n"
     ]
    }
   ],
   "source": [
    "print (model.summary())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "for iteration in range(1, 6):\n",
    "    print()\n",
    "    print('-' * 50)\n",
    "    print('Iteration', iteration)\n",
    "    history=model.fit(X, y, batch_size=128, nb_epoch=1,verbose=0)\n",
    "    sleep(0.1) # https://github.com/fchollet/keras/issues/2110\n",
    "    \n",
    "    # saving models at the following iterations -- uncomment it if you want tos save weights and load it later\n",
    "    #if iteration==1 or iteration==3 or iteration==5 or iteration==10 or iteration==20 or iteration==30 or iteration==50 or iteration==60 :\n",
    "    #    model.save_weights('Karpathy_LSTM_weights_'+str(iteration)+'.h5', overwrite=True)\n",
    "    #start_index = random.randint(0, len(text) - maxlen - 1)\n",
    "\n",
    "    #sys.stdout.flush()\n",
    "    print ('loss is')\n",
    "    print (history.history['loss'][0])\n",
    "    print (history)\n",
    "    print()    \n",
    "\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### testing\n",
    "now you use the trained model to generat text.\n",
    "the  output shown in this notebook is for a model which is trained only for 1 iteration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "seed string --> brutus:\n",
      "The generated text is\n",
      "brutus:\n",
      "ay, and he shall go.\n",
      "\n",
      "balthasar:\n",
      "i will be brief.\n",
      "\n",
      "angelo:\n",
      "the set of thing can speak to the people,\n",
      "because he that dole, commit be no more to be prevail'd,\n",
      "that i am call'd for a perpetual.\n",
      "\n",
      "hortensio:\n",
      "sir, the strangeness of your dear lord,\n",
      "the common people with my heart as well as old.\n",
      "\n",
      "capulet:\n",
      "what is become of"
     ]
    }
   ],
   "source": [
    "seed_string=\"brutus:\"\n",
    "print (\"seed string -->\", seed_string)\n",
    "print ('The generated text is')\n",
    "sys.stdout.write(seed_string),\n",
    "#x=np.zeros((1, len(seed_string), len(chars)))\n",
    "for i in range(320):\n",
    "    x=np.zeros((1, len(seed_string), len(chars)))\n",
    "    for t, char in enumerate(seed_string):\n",
    "        x[0, t, char_indices[char]] = 1.\n",
    "    preds = model.predict(x, verbose=0)[0]\n",
    "    #print (np.argmax(preds[7]))\n",
    "    next_index=np.argmax(preds[len(seed_string)-1])\n",
    "    \n",
    "    \n",
    "    #next_index=np.argmax(preds[len(seed_string)-11])\n",
    "    #print (preds.shape)\n",
    "    #print (preds)\n",
    "    #next_index = sample(preds, 1) #diversity is 1\n",
    "    next_char = indices_char[next_index]\n",
    "    seed_string = seed_string + next_char\n",
    "    \n",
    "    #print (seed_string)\n",
    "    #print ('##############')\n",
    "    #if i==40:\n",
    "    #    print ('####')\n",
    "    sys.stdout.write(next_char)\n",
    "\n",
    "sys.stdout.flush()    \n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
